clear
set mem 800m
set more off
capture log close

*do ${fmartorell_home}top_program

* global rawpath "${receipts}/thecb/thecb_07-04/"
* note: this folder is from ${receipts}/thecb/thecb_07-01/
global rawpath "${d1}thecb/thecb_07-01/"

*global data "${fmartorell_home}data"
global data "${d1}data"

*log using ${fmartorell_home}programs/newreport1jr.log, replace
log using ${d1}log/newreport1jr.log, replace


/*========================================================================
    REPORT1_04_05 DATA
    UPDATE OF VERA'S PROGRAM TO READ IN RAW REPORT 1 JC DATA FILES    

    This program reads in the raw THECB Report 1 in the "srcdata" folder.
    Then it performs some simple 
    data checks on each file before saving.

    Because some of the data overlap with files that we have already 
    received, the bottom compares these to the read in of the old data.  
    This provides a test of whether the data is identical but not of 
    whether one is read in correctly or not, since this program is 
    based on the program used to read in the older files.
  ========================================================================*/




* Several local values are used in this file - yr, sem, semloop. 
* Local value of semloop is TEA year notation.
* Local values of yr and sem are Green Center school year and semester notations.

program define readin
 args sem yr calyr

display _n(2) "Community Colleges"
display in yellow _n(2) "Year is `yr', Semester is `sem'"

di _n(2) "Infixing data"
#delimit ;
if `yr'==203 | (`yr'==204 & `sem'==1) {;
infix	
	byte rectype   1
	long school    2-7
	str9 altpid    8-16
	str1 asex      17
	byte type      18
	str4 byrs      19-22
	str2 bmos      23-24
	str2 bdas      25-26
	str1 tutstats  27
	int  res       28-30
	str6 transfers 31-36
	long indist    37-40
	long outdist   41-44
	long idist     45-48
	long odist     49-52
	long major     53-60
	int  credit    61-62
	str2 ecodes    63-64
	str1 remotes   65
	str1 majs      66	
	str1 ethnics   67
	str12 x        68-79
	byte semester  80
	long year  81-84
	str1 flexs     85
	str1 unused    86
	str1 acadiss   87
	str1 ecodiss   88
	str1 disables  89
	str1 leps      90
	str1 pgbs      91
	str1 dishomes  92
	str1 singpars  93
	str3 schs      94-96
	str3 interachs 97-99
	str3 intertecs 100-102
	str2 conhss    103-104
	str2 schuglims 105-106
	str2 devedschs 107-108
	str2 tottcschs 109-110
	str3 dvedovers 111-113
	str1 intents   114
	str5 unused2 115-119
	str1 update 120
	/* using ${receipts}/thecb/thecb_1_05/thecb_2004_2/D1JR102.`4' */ 
    using "${rawpath}D1JR`sem'0`calyr'.`4'";
};


else {;
 infix	
	byte rectype   1
	long school    2-7
	str9 altpid    8-16
	str1 asex      17
	byte type      18
	str4 byrs      19-22
	str2 bmos      23-24
	str2 bdas      25-26
	str1 tutstats  27
	int  res       28-30
	str6 transfers 31-36
	long indist    37-40
	long outdist   41-44
	long idist     45-48
	long odist     49-52
	long major     53-60
	int  credit    61-62
	str2 ecodes    63-64
	str1 remotes   65
	str1 majs      66	
	str1 ethnics   67
	str12 x        68-79
	byte semester  80
	long year  81-84
	str1 flexs     85
	str1 unused    86
	str1 acadiss   87
	str1 ecodiss   88
	str1 disables  89
	str1 leps      90
	str1 pgbs      91
	str1 dishomes  92
	str1 singpars  93
	str3 schs      94-96
	str3 interachs 97-99
	str3 intertecs 100-102
	str2 conhss    103-104
	str2 schuglims 105-106
	str2 devedschs 107-108
	str2 tottcschs 109-110
	str3 dvedovers 111-113
	str1 intents   114
  	 str1 unused2   115

         long tech_ch_nostfnd  116-118
	 long acad_ch_nostfnd  119-121
	 int tech_sch_nostfnd  122-123
	 int acad_sch_nostfnd  124-125
	 str1 update 126 /*not really the update variable, placeholder */
	 
 using "${rawpath}D1JR`sem'0`calyr'.`4'";
 };



#delimit cr

di _n(2) "Dropping the trailing record:"
list if school == 778899
drop if school == 778899
*We need to drop trailing record from the very beginning."

  display in white "Check years"
  qui  generate cur_year=`yr'
  tabulate cur_year semester
  tabulate year semester

if (`yr'==202 & `sem'==1)	{
	di "Replacing the wrong value of school where it is equal to '01038?' in the raw data"
	replace school=10387 if school==.
}


display _n(2) "Creating bdate"
display "Looking at byr"
count if missing(byrs)
gen long byr=real(byrs)
tab byr if byr<1900 | byr>1999
replace byr=byr+1000 if byr<1000
replace byr=byr-1000 if byr>2000

display "Looking at bmo"
count if missing(bmos)
gen int bmo=real(bmos)

display "looking at bda"
count if missing(bdas)
generate int bda=real(bdas)

display "Generating birthday"
gen bdate = mdy(bmo,bda,byr)
format bdate %dN/D/CY
sum bdate
tabulate bdate in 1/4


di _n(2) "Dropping byr, bmo, bda"
drop byr
drop bmo
drop bda
drop byrs
drop bmos
drop bdas

display _n(2) "Generating Variable tutstat"
count if missing(tutstats)
gen long tutstat=real(tutstats)
tab tutstat, m
if (`yr'==202 & `sem'==1)	{
	replace tutstat=. if tutstat==8
}
replace tutstat=6 if tutstats=="A"
replace tutstat=7 if tutstats=="B"
replace tutstat=8 if tutstats=="C"
replace tutstat=9 if tutstats=="D"
tab tutstat,m
tab tutstat tutstats, m 
drop tutstats

label define tutstat 1 "In-District" 2 "Out-of-Distr." 3 "Nonresident" 5 "Waiver" 6 "Res,In-D(A)" 7 "Res,Out-D(B)" 8 "Pending,In-D(C)" 9 "Pending,Out-D(D)"
label values tutstat tutstat
tab tutstat, m

di _n(2) "Attaching notes to variable 'tutstat'"
note tutstat: See Reporting & Procedures Manual for detailed value labels

display _n(2) "Creating sex"
gen byte sex = 0 if asex=="F"
replace sex = 1 if asex=="M"

label define sex 0 "Female" 1 "Male"
label values sex sex
tab sex, missing
tab sex asex, missing nolabel
di "Dropping asex"
drop asex


display _n(2) "transfer"
count if missing(transfers)
gen long transfer=real(transfers)
drop transfers


display _n(2) "ecode"
count if missing(ecodes)
gen byte ecode=real(ecodes)
tab ecode, m
tab ecodes
drop ecodes


display _n(2) "remote"
count if missing(remotes)
gen byte remote=real(remotes)
drop remotes


display _n(2) "maj"
count if missing(majs)
gen byte maj=real(majs)
tab maj, m
drop majs


display _n(2) "flex"
count if missing(flexs)
gen byte flex=real(flexs)
tab flex, m
drop flexs

note flex: Flexible Entry accounts for many duplicate observations. See Reporting and Procedures Manual p 1.1


display _n(2) "acadis"
count if missing(acadiss)
generate byte acadis=real(acadiss)
tabulate acadiss
tabulate acadis, m
drop acadiss


display _n(2) "ecodis"
count if missing(ecodiss)
generate byte ecodis=real(ecodiss)
tabulate ecodiss
tabulate ecodis, m
drop ecodiss


display _n(2) "disable"
count if missing(disables)
generate byte disable=real(disables)
tabulate disables
tabulate disable, m
drop disables


display _n(2) "lep"
count if missing(leps)
generate byte lep=real(leps)
tabulate leps
tabulate lep, m
drop leps


display _n(2) "pgb"
count if missing(pgbs)
generate byte pgb=real(pgbs)
tabulate pgbs
tabulate pgb, m 
drop pgbs


display _n(2) "dishome"
count if missing(dishomes)
generate byte dishome=real(dishomes)
tabulate dishomes
tabulate dishome,m 
drop dishomes


display _n(2) "singpar"
count if missing(singpars)
generate byte singpar=real(singpars)
tabulate singpars
tabulate singpar
drop singpars


display _n(2) "sch"
count if missing(schs)
generate long sch=real(schs)
tabulate sch, m 
tab schs
drop schs


display _n(2) "interach"
count if missing(interachs)
generate int interach=real(interachs)
tabulate interach,m
tab interachs
drop interachs


display _n(2) "intertec"
count if missing(intertecs)
generate int intertec=real(intertecs)
tab intertec,m
tab intertecs
drop intertecs


display _n(2) "conhs"
count if missing(conhss)
generate int conhs=real(conhss)
tabulate conhs,m
tabulate conhss
drop conhss


display _n(2) "schuglim"
count if missing(schuglims)
generate int schuglim=real(schuglims)
tabulate schuglims
tabulate schuglim,m
drop schuglims


display _n(2) "devedsch"
count if missing(devedschs)
generate int devedsch=real(devedschs)
tabulate devedsch,m
tabulate devedschs
drop devedschs


display _n(2) "tottcsch"
count if missing(tottcschs)
generate int tottcsch=real(tottcschs)
tabulate tottcsch,m
tabulate tottcschs
drop tottcschs


display _n(2) "dvedover"
count if missing(dvedovers)
generate long dvedover=real(dvedovers)
tabulate dvedover,m
tabulate dvedovers
drop dvedovers


display _n(2) "intent"
count if missing(intents)
generate byte intent=real(intents)
tab intents if missing(intent)
tab intent,m
tab intents
drop intents


display _n(2) "ethnic"
tab ethnics
count if missing(ethnics)
gen byte ethnic=real(ethnics)
recode ethnic 1=5 3=4 2=3 4=2 5=1 7=.
notes ethnic: Codes changed from raw data to standard Green Center codes
tab ethnic, missing
drop ethnics

display _n(2) "Tabulate x"
tab x
su x
drop x

di _n(2) "Tabulation of Variable 'unused'"
tab unused
drop unused 

display _n(2) "Tabulate 'unused2'"
tabulate unused2, m
drop unused2


di _n(2) "Tabulation of Variable 'update'"
cap tabulate update, m



label define ethnic 1 "Nat. American" 2 "Asian" 3 "Afr. American" 4 "Hispanic" 5 "Anglo" 6 "Foreign" 7 "Multiracial" 8 "Other"
label values ethnic ethnic

display _n(2) "Labeling the Variables in the File"
label var rectype "Report (always 1)"
label var school "College FICE Code"
label var altpid "Encrypted SSN"
label var sex "Gender"
label var type "Student Classification"
label var bdate "Date of Birth"
label var tutstat "Tuition Status"
label var res "Residence"
label var transfer "FICE Code of Previous College"
label var indist "Academic Hrs. In-District"
label var outdist "Academic Hrs. Out-District" 
label var idist "Voc-Tech. Hrs. In-District"
label var odist "Voc-Tech. Hrs. Out-District"
label var major 
label var credit "Semester Credit Hr. Load"
label var ecode "Tuition Exemption/Waiver Code"
label var remote "Remote Campus"
label var maj "Type of Major"
label var ethnic "Race/ Ethnicity"
label var semester 
label var year "Current Year"
label var flex "Flexible Entry"
label var acadis "Academic Disadvantage"
label var ecodis "Economic Disadvantage"
label var disable "Disability"
label var lep "Limited English Proficiency"
label var pgb "Program to Eliminate Gender Bias"
label var dishome "Displaced Homemaker"
label var singpar "Single Parent"
label var sch "Devel. Ed. Hrs. in Excess of State Limit"
label var interach "Inter-Inst. Acad. Hrs"
label var intertec "Inter-Inst. Tech. Hrs."
label var conhs "Concurrent SCH"
label var schuglim "State Funded SCH"
label var devedsch "Devel. Ed SCH"
label var tottcsch "Total Technical SCH"
label var dvedover "Excess SCH-Devel. Ed."
label var intent "Degree Sought"
label var update

 

di _n(2) "Ordering the Data"

#delimit ;

order 
	rectype
	school
	altpid
	sex
	type
	bdate
	tutstat
	res
	transfer
	indist
	outdist
	idist
	odist
	major
	credit
	ecode
	remote
	maj
	ethnic
	semester
	year
	flex
	acadis
	ecodis
	disable
	lep
	pgb
	dishome
	singpar
	sch
	interach
	intertec
   conhs
   schuglim
   devedsch
   tottcsch
   dvedover
   intent
	update;

#delimit cr


di _n(2) "Compressing the Data:"
compress

di _n(2) "Checking Duplicate records NOT acounted for by flex-entry"
gen flex2=0
replace flex2=flex if flex~=.
sort altpid sex school flex2
by altpid sex school: gen countN=_N
by altpid sex school: gen countn=_n
tab countn flex2, m
table countn flex2 countN
drop flex2 countn countN

di "Descriptive Statistics"
d
summarize

display _n(2) "Sorting the data"
sort altpid school sex-flex



di _n(2) "Finally, Saving of the File"
*save ${fmartorell_home}data/highered/d1jr`yr'_`sem', replace
save ${d1}data/highered/d1jr`yr'_`sem', replace
clear



end

*readin 1 203 2 T
readin 2 203 3 T  /* spring of tspyr=203 */
readin 1 204 3  /* fall of tspyr=204 (calyear=2003) */
readin 2 204 4
readin 3 204 4 
readin 4 204 4 
readin 1 205 4
readin 2 205 5
readin 3 205 5
readin 4 205 5 


log close
